This is an exploratory analysis on three classes of students scores in Indonesian language subject. These classes were under the tutelage of A. N. in MAN 3 Lebak and I hope I can discover something of value from this analysis.
The data consists of prepared tables that I had processed in excel, they are:
harian table, that presents 4 sets of minor tests
result that each consists of proyek,
portofolio, and praktek;
phbo pat table that presents 2 sets of major (half
and last semester) tests result.
library(readxl)
library(tidyverse)
library(tidymodels)
library(encryptr)
library(janitor)
library(skimr)
library(plotly)
library(glue)
library(GGally)
library(tidytext)
library(FactoMineR)
library(ggiraphExtra)
library(factoextra)
harian <- read_excel("edit_bi_kelas_noni.xlsx",
sheet = "harian")
semesteran <- read_excel("edit_bi_kelas_noni.xlsx",
sheet = "phbo pat")
Clean the column names.
harian <- clean_names(harian)
semesteran <- clean_names(semesteran)
Encrypt the name of students to protect privacy.
#genkeys()
Create codes for names.
names_encryption <-
harian %>%
select(nama_siswa) %>%
unique() %>%
mutate(
name_code = nama_siswa
) %>%
encrypt(name_code)
Replace real names in nama_siswa with the codes.
harian_encrypted <-
harian %>%
left_join(names_encryption,
by = c("nama_siswa" = "nama_siswa"),
keep = F) %>%
select(-nama_siswa) %>%
select(name_code, everything())
harian_encrypted %>% head()
## # A tibble: 6 x 8
## name_code kelas penil~1 materi nilai~2 proyek praktek porto~3
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2d10f59d016483c1cce2a1ff0~ XI I~ Harian~ Propo~ 78 78 78 78
## 2 a7bb645682646965a7b3998ea~ XI I~ Harian~ Propo~ 78 78 78 78
## 3 13d13d8ba02bea54041a02584~ XI I~ Harian~ Propo~ 78 78 78 78
## 4 a983d069d7b7ddab8cddc8fa8~ XI I~ Harian~ Propo~ 78 78 78 78
## 5 3ec8b7095e42dddd7c1bedeb2~ XI I~ Harian~ Propo~ 81 80 83 80
## 6 7d8a0ad9554e63d9cb507d5d5~ XI I~ Harian~ Propo~ 78 78 78 78
## # ... with abbreviated variable names 1: penilaian, 2: nilai_ph, 3: portofolio
Compare total unique nama_siswa and unique
name_code to check.
n_distinct(harian$nama_siswa)
## [1] 82
n_distinct(harian_encrypted$name_code)
## [1] 82
Encrypt semesteran table as well.
semesteran_encrypted <-
semesteran %>%
left_join(names_encryption,
by = c("nama_siswa" = "nama_siswa"),
keep = F) %>%
select(-nama_siswa) %>%
select(name_code, everything())
semesteran_encrypted %>% head()
## # A tibble: 6 x 4
## name_code kelas penil~1 nilai
## <chr> <chr> <chr> <dbl>
## 1 2d10f59d016483c1cce2a1ff066a05fc8164028f9df861ffbd105fc1a~ XI I~ PHBO 78
## 2 a7bb645682646965a7b3998ea7e088f0b92d785f5ce846543bd924b8a~ XI I~ PHBO 78
## 3 13d13d8ba02bea54041a02584536224960dce9f9799ad5fe6ded7602d~ XI I~ PHBO 78
## 4 a983d069d7b7ddab8cddc8fa8be7aa2d6ed1ea3be890a68e187e025f5~ XI I~ PHBO 78
## 5 3ec8b7095e42dddd7c1bedeb26ca5ebd32b1ba608295ea21174b4fcf3~ XI I~ PHBO 98
## 6 7d8a0ad9554e63d9cb507d5d5a8c4b9fabf8aad4f8589160141dbf317~ XI I~ PHBO 78
## # ... with abbreviated variable name 1: penilaian
n_distinct(semesteran$nama_siswa)
## [1] 82
n_distinct(semesteran_encrypted$name_code)
## [1] 82
I had processed it in excel, but let’s check missing data again.
anyNA(harian_encrypted)
## [1] FALSE
anyNA(semesteran_encrypted)
## [1] FALSE
Shuffle the row so the real names can’t be identified using the order of the original rows.
harian_encrypted <- harian_encrypted %>% slice_sample(prop = 1)
semesteran_encrypted <- semesteran_encrypted %>% slice_sample(prop = 1)
glimpse(harian_encrypted)
## Rows: 328
## Columns: 8
## $ name_code <chr> "664903989b60c806285a1419335b6687beee051dc0b33c3e6d55b71722~
## $ kelas <chr> "XI IPS 1", "XI MIPA", "XI IPS 2", "XI MIPA", "XI MIPA", "X~
## $ penilaian <chr> "Harian 4", "Harian 2", "Harian 2", "Harian 2", "Harian 2",~
## $ materi <chr> "Drama", "Karya Ilmiah", "Karya Ilmiah", "Karya Ilmiah", "K~
## $ nilai_ph <dbl> 85.00000, 86.00000, 78.00000, 86.00000, 78.00000, 85.00000,~
## $ proyek <dbl> 85, 90, 78, 90, 78, 85, 78, 78, 85, 78, 78, 78, 78, 78, 78,~
## $ praktek <dbl> 85, 78, 78, 78, 78, 85, 78, 78, 85, 78, 78, 78, 78, 78, 78,~
## $ portofolio <dbl> 85, 90, 78, 90, 78, 85, 78, 78, 85, 78, 78, 78, 78, 78, 78,~
glimpse(semesteran_encrypted)
## Rows: 164
## Columns: 4
## $ name_code <chr> "13d13d8ba02bea54041a02584536224960dce9f9799ad5fe6ded7602df6~
## $ kelas <chr> "XI IPS 2", "XI MIPA", "XI IPS 1", "XI IPS 1", "XI IPS 1", "~
## $ penilaian <chr> "PAT", "PHBO", "PAT", "PHBO", "PHBO", "PAT", "PAT", "PHBO", ~
## $ nilai <dbl> 85, 78, 85, 88, 78, 86, 84, 78, 85, 83, 86, 90, 84, 90, 83, ~
We can join the tables with name_code.
data <-
harian_encrypted %>% left_join(semesteran_encrypted[,c(1, 3, 4)],
by = "name_code",
suffix = c("_h", "_s"))
glimpse(data)
## Rows: 656
## Columns: 10
## $ name_code <chr> "664903989b60c806285a1419335b6687beee051dc0b33c3e6d55b7172~
## $ kelas <chr> "XI IPS 1", "XI IPS 1", "XI MIPA", "XI MIPA", "XI IPS 2", ~
## $ penilaian_h <chr> "Harian 4", "Harian 4", "Harian 2", "Harian 2", "Harian 2"~
## $ materi <chr> "Drama", "Drama", "Karya Ilmiah", "Karya Ilmiah", "Karya I~
## $ nilai_ph <dbl> 85, 85, 86, 86, 78, 78, 86, 86, 78, 78, 85, 85, 78, 78, 78~
## $ proyek <dbl> 85, 85, 90, 90, 78, 78, 90, 90, 78, 78, 85, 85, 78, 78, 78~
## $ praktek <dbl> 85, 85, 78, 78, 78, 78, 78, 78, 78, 78, 85, 85, 78, 78, 78~
## $ portofolio <dbl> 85, 85, 90, 90, 78, 78, 90, 90, 78, 78, 85, 85, 78, 78, 78~
## $ penilaian_s <chr> "PAT", "PHBO", "PHBO", "PAT", "PHBO", "PAT", "PAT", "PHBO"~
## $ nilai <dbl> 87, 95, 89, 84, 78, 78, 85, 91, 83, 95, 86, 85, 90, 86, 94~
Convert all the chr columns except name_code, to factor.
data <- data %>%
mutate(across(.cols = c(where(is.character), -name_code),.fns = as.factor))
glimpse(data)
## Rows: 656
## Columns: 10
## $ name_code <chr> "664903989b60c806285a1419335b6687beee051dc0b33c3e6d55b7172~
## $ kelas <fct> XI IPS 1, XI IPS 1, XI MIPA, XI MIPA, XI IPS 2, XI IPS 2, ~
## $ penilaian_h <fct> Harian 4, Harian 4, Harian 2, Harian 2, Harian 2, Harian 2~
## $ materi <fct> Drama, Drama, Karya Ilmiah, Karya Ilmiah, Karya Ilmiah, Ka~
## $ nilai_ph <dbl> 85, 85, 86, 86, 78, 78, 86, 86, 78, 78, 85, 85, 78, 78, 78~
## $ proyek <dbl> 85, 85, 90, 90, 78, 78, 90, 90, 78, 78, 85, 85, 78, 78, 78~
## $ praktek <dbl> 85, 85, 78, 78, 78, 78, 78, 78, 78, 78, 85, 85, 78, 78, 78~
## $ portofolio <dbl> 85, 85, 90, 90, 78, 78, 90, 90, 78, 78, 85, 85, 78, 78, 78~
## $ penilaian_s <fct> PAT, PHBO, PHBO, PAT, PHBO, PAT, PAT, PHBO, PAT, PHBO, PAT~
## $ nilai <dbl> 87, 95, 89, 84, 78, 78, 85, 91, 83, 95, 86, 85, 90, 86, 94~
skim(data = data)
| Name | data |
| Number of rows | 656 |
| Number of columns | 10 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| factor | 4 |
| numeric | 5 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| name_code | 0 | 1 | 512 | 512 | 0 | 82 | 0 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| kelas | 0 | 1 | FALSE | 3 | XI : 256, XI : 208, XI : 192 |
| penilaian_h | 0 | 1 | FALSE | 4 | Har: 164, Har: 164, Har: 164, Har: 164 |
| materi | 0 | 1 | FALSE | 4 | Dra: 164, Kar: 164, Pro: 164, Res: 164 |
| penilaian_s | 0 | 1 | FALSE | 2 | PAT: 328, PHB: 328 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| nilai_ph | 0 | 1 | 79.92 | 3.15 | 78 | 78 | 78 | 81.67 | 92.67 | ▇▁▂▁▁ |
| proyek | 0 | 1 | 80.19 | 4.01 | 78 | 78 | 78 | 80.00 | 100.00 | ▇▂▁▁▁ |
| praktek | 0 | 1 | 79.70 | 3.03 | 78 | 78 | 78 | 80.00 | 90.00 | ▇▁▂▁▁ |
| portofolio | 0 | 1 | 79.88 | 3.83 | 78 | 78 | 78 | 80.00 | 100.00 | ▇▁▁▁▁ |
| nilai | 0 | 1 | 84.57 | 5.33 | 78 | 78 | 85 | 88.00 | 98.00 | ▇▇▅▂▁ |
summary(data)
## name_code kelas penilaian_h materi
## Length:656 XI IPS 1:208 Harian 1:164 Drama :164
## Class :character XI IPS 2:192 Harian 2:164 Karya Ilmiah :164
## Mode :character XI MIPA :256 Harian 3:164 Proposal Kegiatan:164
## Harian 4:164 Resensi :164
##
##
## nilai_ph proyek praktek portofolio penilaian_s
## Min. :78.00 Min. : 78.00 Min. :78.0 Min. : 78.00 PAT :328
## 1st Qu.:78.00 1st Qu.: 78.00 1st Qu.:78.0 1st Qu.: 78.00 PHBO:328
## Median :78.00 Median : 78.00 Median :78.0 Median : 78.00
## Mean :79.92 Mean : 80.19 Mean :79.7 Mean : 79.88
## 3rd Qu.:81.67 3rd Qu.: 80.00 3rd Qu.:80.0 3rd Qu.: 80.00
## Max. :92.67 Max. :100.00 Max. :90.0 Max. :100.00
## nilai
## Min. :78.00
## 1st Qu.:78.00
## Median :85.00
## Mean :84.57
## 3rd Qu.:88.00
## Max. :98.00
data <- data %>%
pivot_longer(cols = 6:8,
names_to = "ppp",
values_to = "nilai_ppp")
data_mean_ph <-
data %>%
group_by(kelas, penilaian_h, materi, ppp) %>%
summarise(rata2_ph = round(mean(nilai_ph),digits = 2)) %>%
mutate(avg = glue("Nilai Rata-rata {rata2_ph}"))
acols <- c("Harian 1" = "#C6E0FF",
"Harian 2" = "#579A9E",
"Harian 3" = "#3292C3",
"Harian 4" = "#BCAB79")
rata.plot <- ggplot(data_mean_ph, aes(y = kelas, x = rata2_ph, fill = penilaian_h))+
geom_col(aes(text = avg))+
facet_wrap(~penilaian_h)+
theme_minimal()+
theme(legend.position = "none",
axis.title.y = element_blank())+
labs(title = "Perbandingan Rata-rata Nilai Harian Pelajaran Bahasa",
subtitle = "2020/2021 Semester Genap",
x = "Nilai")+
scale_fill_manual(
values = acols
)+
xlim(0, 100)
#rata.plot
ggplotly(rata.plot, tooltip = "text")
data <- data %>%
mutate( # repair ppp content
ppp = str_replace(ppp, "p", "P" )
) %>%
mutate( # limit digits
nilai_ph = round(nilai_ph, digits = 2)
) %>%
mutate( # creating tooltip
exp = glue(
"{kelas}
{penilaian_h}: {nilai_ph}
{ppp}: {nilai_ppp}"
)
)
fcols <- c("XI IPS 1" = "#BEEF9E",
"XI IPS 2" = "#3292C3",
"XI MIPA" = "#BCAB79")
ppp.plot <- ggplot(data, aes(y = materi,
x = nilai_ppp,
fill = kelas))+
geom_col(position = position_dodge(width = 0.95),
aes(text = exp))+
facet_wrap(~ppp)+
theme_minimal()+
theme(legend.position = "none",
axis.title.y = element_blank())+
labs(title = "Perbandingan Rincian Nilai Tugas Pelajaran Bahasa",
subtitle = "2020/2021 Semester Genap",
x = "Nilai")+
scale_fill_manual(
values = fcols
)
#ppp.plot
ggplotly(ppp.plot, tooltip = "text")
hxp.plot <-
ggplot(data,
aes(x = nilai_ppp,
y = nilai_ph))+
geom_jitter(aes(col = kelas,
text = exp),
width = 0.3,
height = 0.3,
alpha = 0.7)+
theme_minimal()+
labs(title = "Hubungan Nilai PPP dan Penilaian Harian",
x = "Nilai PPP",
y = "Penilaian Harian")+
scale_color_manual(
values = fcols
)+
theme(legend.position = "none",
axis.title.y = element_blank())
#hxp.plot
ggplotly(hxp.plot, tooltip = "text")
corp3ph.kelas.materi <- data %>%
group_by(kelas, materi) %>%
summarize(correlation = cor(nilai_ppp, nilai_ph))
corp3ph.kelas.materi
## # A tibble: 12 x 3
## # Groups: kelas [3]
## kelas materi correlation
## <fct> <fct> <dbl>
## 1 XI IPS 1 Drama 1
## 2 XI IPS 1 Karya Ilmiah 0.806
## 3 XI IPS 1 Proposal Kegiatan 0.813
## 4 XI IPS 1 Resensi 0.918
## 5 XI IPS 2 Drama 1
## 6 XI IPS 2 Karya Ilmiah 0.805
## 7 XI IPS 2 Proposal Kegiatan 0.881
## 8 XI IPS 2 Resensi 0.817
## 9 XI MIPA Drama 1
## 10 XI MIPA Karya Ilmiah 0.771
## 11 XI MIPA Proposal Kegiatan 0.819
## 12 XI MIPA Resensi 0.882
corp3ph.kelas.materi %>% slice_max(correlation)
## # A tibble: 3 x 3
## # Groups: kelas [3]
## kelas materi correlation
## <fct> <fct> <dbl>
## 1 XI IPS 1 Drama 1
## 2 XI IPS 2 Drama 1
## 3 XI MIPA Drama 1
corp3ph.kelas.materi %>% slice_min(correlation)
## # A tibble: 3 x 3
## # Groups: kelas [3]
## kelas materi correlation
## <fct> <fct> <dbl>
## 1 XI IPS 1 Karya Ilmiah 0.806
## 2 XI IPS 2 Karya Ilmiah 0.805
## 3 XI MIPA Karya Ilmiah 0.771
cor.plot.k.m <-
ggplot(corp3ph.kelas.materi,
aes(x = materi,
y = correlation,
text = kelas))+
geom_col(aes(fill = kelas),
position = position_dodge()
)+
theme_minimal()+
labs(title = "Korelasi Materi dan Nilai Harian",
x = "Materi")+
scale_fill_manual(
values = fcols
)+
theme(legend.position = "none",
axis.title.y = element_blank())
#cor.plot.k.m
ggplotly(cor.plot.k.m, tooltip = "text")
corp3ph.kelas.p3 <- data %>%
group_by(kelas, ppp) %>%
summarize(correlation = cor(nilai_ppp, nilai_ph))
corp3ph.kelas.p3
## # A tibble: 9 x 3
## # Groups: kelas [3]
## kelas ppp correlation
## <fct> <chr> <dbl>
## 1 XI IPS 1 Portofolio 0.921
## 2 XI IPS 1 Praktek 0.732
## 3 XI IPS 1 Proyek 0.960
## 4 XI IPS 2 Portofolio 0.902
## 5 XI IPS 2 Praktek 0.731
## 6 XI IPS 2 Proyek 0.962
## 7 XI MIPA Portofolio 0.937
## 8 XI MIPA Praktek 0.621
## 9 XI MIPA Proyek 0.949
corp3ph.kelas.p3 %>% slice_max(correlation)
## # A tibble: 3 x 3
## # Groups: kelas [3]
## kelas ppp correlation
## <fct> <chr> <dbl>
## 1 XI IPS 1 Proyek 0.960
## 2 XI IPS 2 Proyek 0.962
## 3 XI MIPA Proyek 0.949
corp3ph.kelas.p3 %>% slice_min(correlation)
## # A tibble: 3 x 3
## # Groups: kelas [3]
## kelas ppp correlation
## <fct> <chr> <dbl>
## 1 XI IPS 1 Praktek 0.732
## 2 XI IPS 2 Praktek 0.731
## 3 XI MIPA Praktek 0.621
cor.plot.k.p <-
ggplot(corp3ph.kelas.p3, aes(ppp, correlation))+
geom_col(aes(fill = kelas, text = kelas),
position = position_dodge()
)+
theme_minimal()+
labs(title = "Korelasi Penilaian Ranah Keterampilan dan Penilaian Harian",
subtitle = "Berdasarkan Proyek/Praktek/Portfolio",
x = "Ranah Keterampilan")+
scale_fill_manual(
values = fcols
)+
theme(legend.position = "none",
axis.title.y = element_blank())
#cor.plot.k.p
ggplotly(cor.plot.k.p, tooltip = "text")
plot.semester.k <-
ggplot(data, aes(x = nilai, text = exp))+
geom_histogram(aes(fill = kelas),
binwidth = 1)+
facet_wrap(~penilaian_s + kelas)+
theme_minimal()+
theme(legend.position = "none")+
labs(title = "Perbandingan Sebaran Nilai Semesteran Antar Kelas",
x = "Nilai",
y = "Jumlah Siswa")+
scale_fill_manual(
values = fcols
)
#plot.semester.k
ggplotly(plot.semester.k, tooltip = "text")
k.kelas.ph.s <-
data %>%
group_by(kelas, materi) %>%
summarize(correlation = cor(nilai_ph, nilai))
k.kelas.ph.s
## # A tibble: 12 x 3
## # Groups: kelas [3]
## kelas materi correlation
## <fct> <fct> <dbl>
## 1 XI IPS 1 Drama 0.557
## 2 XI IPS 1 Karya Ilmiah 0.396
## 3 XI IPS 1 Proposal Kegiatan 0.481
## 4 XI IPS 1 Resensi 0.686
## 5 XI IPS 2 Drama 0.285
## 6 XI IPS 2 Karya Ilmiah 0.182
## 7 XI IPS 2 Proposal Kegiatan 0.588
## 8 XI IPS 2 Resensi 0.400
## 9 XI MIPA Drama 0.212
## 10 XI MIPA Karya Ilmiah 0.338
## 11 XI MIPA Proposal Kegiatan 0.330
## 12 XI MIPA Resensi 0.527
bcols <- c("Drama" = "#C6E0FF",
"Karya Ilmiah" = "#579A9E",
"Proposal Kegiatan" = "#3292C3",
"Resensi" = "#BCAB79")
plot.k.h.s <-
ggplot(k.kelas.ph.s, aes(x = kelas,
y = correlation,
fill = materi,
text = materi))+
geom_col(position = position_dodge())+
scale_fill_manual(
values = bcols
)+
theme_minimal()+
theme(legend.position = "none",
axis.title.x = element_blank(),
axis.title.y = element_blank())+
labs(title = "Korelasi Nilai Harian dan Semesteran",
subtitle = "Berdasarkan Kelas dan Materi")
#plot.k.h.s
ggplotly(plot.k.h.s, tooltip = "text")
k.kelas.ppp.s <-
data %>%
group_by(ppp, kelas) %>%
summarize(correlation = cor(nilai_ppp, nilai)) %>%
arrange(desc(correlation))
k.kelas.ppp.s
## # A tibble: 9 x 3
## # Groups: ppp [3]
## ppp kelas correlation
## <chr> <fct> <dbl>
## 1 Proyek XI IPS 1 0.512
## 2 Portofolio XI IPS 1 0.469
## 3 Praktek XI IPS 1 0.363
## 4 Proyek XI MIPA 0.344
## 5 Praktek XI IPS 2 0.330
## 6 Portofolio XI MIPA 0.304
## 7 Proyek XI IPS 2 0.294
## 8 Portofolio XI IPS 2 0.253
## 9 Praktek XI MIPA 0.202
plot.ppp.k.s <-
ggplot(k.kelas.ppp.s, aes(x = ppp,
y = correlation,
fill = kelas,
text = kelas))+
geom_col(position = position_dodge())+
scale_fill_manual(
values = fcols
)+
theme_minimal()+
theme(legend.position = "none",
axis.title.x = element_blank(),
axis.title.y = element_blank())+
labs(title = "Korelasi Penilaian Ranah Keterampilan dan Nilai Semesteran",
subtitle = "Berdasarkan Kelas")
#plot.ppp.k.s
ggplotly(plot.ppp.k.s, tooltip = "text")
data_edit_col <-
data %>%
mutate("Nilai Ranah Keterampilan" = nilai_ppp,
"Nilai Harian" = nilai_ph,
"Nilai Semesteran" = nilai,
"Kelas" = kelas)
p.par.nilai <-
ggparcoord(data_edit_col,
columns = c("Nilai Ranah Keterampilan",
"Nilai Harian",
"Nilai Semesteran"),
groupColumn = "Kelas",
scale = "uniminmax",
order = c(11,12,13),
showPoints = TRUE,
splineFactor = 4,
title = "Paralel Plot Nilai Bahasa Indonesia",
alphaLines = 0.6)+
scale_color_manual(values = fcols) +
theme(
axis.title.x = element_blank()
)+
theme_minimal()+
labs(y = "Scaled with UniMinMax",
col = "Kelas")
ggplotly(p.par.nilai, tooltip = "Kelas")
s.p3.h.s <-
data %>%
group_by(ppp, penilaian_h, penilaian_s) %>%
summarise(kor_ppp_ph = cor(nilai_ppp, nilai_ph),
kor_ph_s = cor(nilai_ph, nilai))
s.p3.h.s
## # A tibble: 24 x 5
## # Groups: ppp, penilaian_h [12]
## ppp penilaian_h penilaian_s kor_ppp_ph kor_ph_s
## <chr> <fct> <fct> <dbl> <dbl>
## 1 Portofolio Harian 1 PAT 0.930 0.437
## 2 Portofolio Harian 1 PHBO 0.930 0.490
## 3 Portofolio Harian 2 PAT 1.00 0.257
## 4 Portofolio Harian 2 PHBO 1.00 0.377
## 5 Portofolio Harian 3 PAT 0.785 0.533
## 6 Portofolio Harian 3 PHBO 0.785 0.583
## 7 Portofolio Harian 4 PAT 1 0.364
## 8 Portofolio Harian 4 PHBO 1 0.381
## 9 Praktek Harian 1 PAT 0.653 0.437
## 10 Praktek Harian 1 PHBO 0.653 0.490
## # ... with 14 more rows
## # i Use `print(n = ...)` to see more rows
Impute missing values.
imputed.s.p3.h.s <-
recipe(~ ., data = s.p3.h.s) %>%
step_impute_bag(kor_ppp_ph) %>%
prep(s.p3.h.s)
imputed.s.p3.h.s
## Recipe
##
## Inputs:
##
## role #variables
## predictor 5
##
## Training data contained 24 data points and 2 incomplete rows.
##
## Operations:
##
## Bagged tree imputation for kor_ppp_ph [trained]
df.imputed.s.p3.h.s <-
bake(imputed.s.p3.h.s, new_data = s.p3.h.s)
df.imputed.s.p3.h.s
## # A tibble: 24 x 5
## # Groups: ppp, penilaian_h [12]
## ppp penilaian_h penilaian_s kor_ppp_ph kor_ph_s
## <fct> <fct> <fct> <dbl> <dbl>
## 1 Portofolio Harian 1 PAT 0.930 0.437
## 2 Portofolio Harian 1 PHBO 0.930 0.490
## 3 Portofolio Harian 2 PAT 1.00 0.257
## 4 Portofolio Harian 2 PHBO 1.00 0.377
## 5 Portofolio Harian 3 PAT 0.785 0.533
## 6 Portofolio Harian 3 PHBO 0.785 0.583
## 7 Portofolio Harian 4 PAT 1 0.364
## 8 Portofolio Harian 4 PHBO 1 0.381
## 9 Praktek Harian 1 PAT 0.653 0.437
## 10 Praktek Harian 1 PHBO 0.653 0.490
## # ... with 14 more rows
## # i Use `print(n = ...)` to see more rows
anyNA(df.imputed.s.p3.h.s)
## [1] FALSE
pcols = c(
"Portofolio" = "#BEEF9E",
"Praktek" = "#3292C3",
"Proyek" = "#BCAB79"
)
p.dens <-
ggplot(df.imputed.s.p3.h.s, aes(kor_ppp_ph, kor_ph_s)) +
stat_density2d(geom="tile", aes(fill = ..density..), contour = FALSE) +
geom_point(size = 6, color = "white")+
geom_point(aes(col=ppp, text = penilaian_s), size = 3)+
labs(title = "Hubungan Nilai Korelasi",
x = "Korelasi Nilai Ranah Keterampilan dan Nilai Harian",
y = "Korelasi Nilai Harian dan Nilai Semesteran",
color = "Keterampilan",
fill = "Density"
)+
theme_minimal()+
scale_color_manual(values = pcols)
#p.dens
ggplotly(p.dens, tooltip = "text")
uk.ppp.s <-
data %>%
select(kelas, nilai_ppp, nilai) %>%
nest(data = c(nilai_ppp, nilai)) %>%
mutate(
test = map(data, ~ cor.test(.$nilai_ppp,
.$nilai)),
tidied = map(test, tidy)
) %>%
unnest(cols = tidied) %>%
select(-data, -test)
uk.ppp.s
## # A tibble: 3 x 9
## kelas estimate statistic p.value parameter conf.low conf.~1 method alter~2
## <fct> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <chr> <chr>
## 1 XI IPS 1 0.451 12.6 1.25e-32 622 0.386 0.512 Pears~ two.si~
## 2 XI MIPA 0.288 8.31 4.31e-16 766 0.221 0.351 Pears~ two.si~
## 3 XI IPS 2 0.289 7.23 1.58e-12 574 0.212 0.362 Pears~ two.si~
## # ... with abbreviated variable names 1: conf.high, 2: alternative
uk.ph.s <-
data %>%
select(kelas, nilai_ph, nilai) %>%
nest(data = c(nilai_ph, nilai)) %>%
mutate(
test = map(data, ~ cor.test(.$nilai_ph,
.$nilai)),
tidied = map(test, tidy)
) %>%
unnest(cols = tidied) %>%
select(-data, -test)
uk.ph.s
## # A tibble: 3 x 9
## kelas estimate statistic p.value parameter conf.low conf.~1 method alter~2
## <fct> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <chr> <chr>
## 1 XI IPS 1 0.516 15.0 1.11e-43 622 0.456 0.571 Pears~ two.si~
## 2 XI MIPA 0.341 10.0 2.65e-22 766 0.276 0.402 Pears~ two.si~
## 3 XI IPS 2 0.332 8.44 2.54e-16 574 0.258 0.403 Pears~ two.si~
## # ... with abbreviated variable names 1: conf.high, 2: alternative
data_wide <-
data %>%
group_by(name_code) %>%
pivot_wider(
names_from = ppp,
values_from = nilai_ppp,
values_fill = 0,
) %>%
pivot_wider(
names_from = penilaian_h,
values_from = nilai_ph,
values_fill = 0
) %>%
pivot_wider(
names_from = penilaian_s,
values_from = nilai,
) %>%
summarise_all(mean) %>%
select(-c(kelas, materi, exp))
data_wide
## # A tibble: 82 x 10
## name_code Proyek Praktek Porto~1 Haria~2 Haria~3 Haria~4 Haria~5 PAT PHBO
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 009c3fa52~ 26 26 26 19.5 19.5 19.5 19.5 85 88
## 2 03bfff0bb~ 26 26 26 19.5 19.5 19.5 19.5 85 78
## 3 0546753a8~ 28.6 26.8 28.6 19.5 21.5 21.2 21.7 87 93
## 4 060bd7b14~ 26.8 26.6 26.5 19.5 19.5 21 19.8 88 93
## 5 0b848e743~ 26 26.6 26 19.5 19.5 19.5 20.1 84 78
## 6 0d9cce6d7~ 30 27.3 29.8 21.2 23.2 21 21.7 91 93
## 7 0ec228726~ 26 26 26 19.5 19.5 19.5 19.5 84 78
## 8 0f407cdc9~ 26 26 26 19.5 19.5 19.5 19.5 83 78
## 9 13d13d8ba~ 26 26 26 19.5 19.5 19.5 19.5 85 78
## 10 13d47c3b4~ 26 26 26 19.5 19.5 19.5 19.5 86 87
## # ... with 72 more rows, and abbreviated variable names 1: Portofolio,
## # 2: `Harian 4`, 3: `Harian 2`, 4: `Harian 3`, 5: `Harian 1`
## # i Use `print(n = ...)` to see more rows
Use recipe to mark some column as ID, then normalise and process the rest.
process_rec <- # define recipe
recipe(~., data = data_wide) %>%
update_role(name_code, new_role = "id") %>%
step_dummy(all_nominal_predictors()) %>%
step_normalize(all_predictors())
process_prep <- prep(process_rec) #evaluate recipe
Before applying pca, let’s look at the prepared data.
juice(process_prep)
## # A tibble: 82 x 10
## name_code Proyek Praktek Porto~1 Haria~2 Haria~3 Haria~4 Haria~5 PAT
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 009c3fa52d00~ -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 0.190
## 2 03bfff0bb8d4~ -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 0.190
## 3 0546753a8f53~ 1.90 0.288 2.13 -0.639 1.76 1.92 2.16 0.766
## 4 060bd7b14727~ 0.0208 0.0271 -0.139 -0.639 -0.411 1.55 -0.304 1.05
## 5 0b848e743bee~ -0.748 0.0271 -0.684 -0.639 -0.411 -0.715 0.0317 -0.0983
## 6 0d9cce6d7691~ 3.35 1.20 3.40 1.55 3.57 1.55 2.16 1.92
## 7 0ec228726269~ -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 -0.0983
## 8 0f407cdc9537~ -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 -0.386
## 9 13d13d8ba02b~ -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 0.190
## 10 13d47c3b44a7~ -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 0.478
## # ... with 72 more rows, 1 more variable: PHBO <dbl>, and abbreviated variable
## # names 1: Portofolio, 2: `Harian 4`, 3: `Harian 2`, 4: `Harian 3`,
## # 5: `Harian 1`
## # i Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
Now let’s apply step_pca().
pca_rec <-
process_rec %>%
step_pca(all_predictors())
pca_prep <- prep(pca_rec)
names(pca_prep)
## [1] "var_info" "term_info" "steps" "template"
## [5] "levels" "retained" "requirements" "tr_info"
## [9] "orig_lvls" "last_term_info"
Look at the pca results.
juice(pca_prep)
## # A tibble: 82 x 6
## name_code PC1 PC2 PC3 PC4 PC5
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 009c3fa52d0061d1892df4db8b157787143c2~ -1.51 -0.0756 1.03 -0.367 -0.463
## 2 03bfff0bb8d4586fb602da381c0b77c0d24b6~ -1.94 0.108 0.306 0.767 -0.207
## 3 0546753a8f539848cea9f24c8f2a82caf6432~ 4.02 1.93 0.919 -0.122 0.962
## 4 060bd7b14727a7dd031ddd77cce6db37577c2~ 0.725 -0.755 1.76 -0.487 0.554
## 5 0b848e743bee93a24ec2478f05351d293f5f3~ -1.42 0.0102 -0.145 0.708 0.500
## 6 0d9cce6d769144b8790139071d342aee4267f~ 6.73 2.03 -0.0470 0.795 -1.21
## 7 0ec2287262695dbaa3251e5df9c3aee0cee6c~ -2.01 0.185 0.120 0.589 -0.139
## 8 0f407cdc953730852f9938ce8d325e71b8d23~ -2.09 0.261 -0.0662 0.411 -0.0704
## 9 13d13d8ba02bea54041a02584536224960dce~ -1.94 0.108 0.306 0.767 -0.207
## 10 13d47c3b44a7b63db7b65d206fb1f0f0cd840~ -1.48 -0.134 1.14 -0.0753 -0.506
## # ... with 72 more rows
## # i Use `print(n = ...)` to see more rows
Find standard deviation in each PC from steps.
sdev <-
pca_prep$steps[[3]]$res$sdev
Calculate percent of variation with standard deviation.
percent_variation <- sdev^2 / sum(sdev^2)
var_df <- data.frame(PC=paste0("PC",1:length(sdev)),
var_explained=percent_variation,
stringsAsFactors = FALSE)
var_df
## PC var_explained
## 1 PC1 6.312045e-01
## 2 PC2 1.295237e-01
## 3 PC3 8.627531e-02
## 4 PC4 5.617098e-02
## 5 PC5 4.530960e-02
## 6 PC6 3.973402e-02
## 7 PC7 9.553585e-03
## 8 PC8 2.228303e-03
## 9 PC9 9.198070e-09
Plot.
var_df %>%
mutate(PC = fct_inorder(PC),
var_rounded = round(var_explained, digits = 2),
var_cum = cumsum(var_rounded)) %>%
ggplot(aes(x = PC,
y= var_explained,
fill = PC))+
geom_col()+
scale_fill_manual(
values = c(
"#C6E0FF",
"#579A9E",
"#3292C3",
"#BCAB79",
"maroon",
"white",
"violet",
"grey",
"brown",
"lightyellow"
)
)+
geom_label(aes(label = var_cum))+
labs(title = "PC Contributions to Variance Explained",
x = NULL,
y = "Var Explained")+
theme(legend.position = "none")
It takes 4 PC to retain 91% variance explained.
tidied_pca <- tidy(pca_prep,
number = 3) # the number of step in recipe, normalize is 2, pca is 3
tidied_pca %>%
filter(component %in% paste0("PC", 1:6)) %>%
mutate(component = fct_inorder(component)) %>%
ggplot(aes(value, terms, fill = terms)) +
geom_col(show.legend = FALSE) +
facet_wrap(~component, nrow = 1) +
labs(y = NULL)+
scale_fill_manual(
values = c(
"maroon",
"white",
"violet",
"grey",
"brown",
"lightyellow",
"#C6E0FF",
"#579A9E",
"#3292C3",
"#BCAB79"
)
)
Focus on the first 4 PC.
tidied_pca %>%
filter(component %in% paste0("PC", 1:4)) %>%
group_by(component) %>%
top_n(8, abs(value)) %>%
ungroup() %>%
mutate(terms = reorder_within(terms, abs(value), component)) %>%
ggplot(aes(abs(value), terms, fill = value > 0)) +
geom_col() +
facet_wrap(~component, scales = "free_y") +
scale_y_reordered() +
labs(
x = "Absolute value of contribution",
y = NULL, fill = "Positive?"
)+
scale_fill_manual(
values = c("#BEEF9E", "#3292C3")
)
The biggest divergence is at PC2, between Harian 2 and Harian 4.
# use juice() to transform int dataframe
juiced_pca <- juice(pca_prep)
juiced_pca
## # A tibble: 82 x 6
## name_code PC1 PC2 PC3 PC4 PC5
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 009c3fa52d0061d1892df4db8b157787143c2~ -1.51 -0.0756 1.03 -0.367 -0.463
## 2 03bfff0bb8d4586fb602da381c0b77c0d24b6~ -1.94 0.108 0.306 0.767 -0.207
## 3 0546753a8f539848cea9f24c8f2a82caf6432~ 4.02 1.93 0.919 -0.122 0.962
## 4 060bd7b14727a7dd031ddd77cce6db37577c2~ 0.725 -0.755 1.76 -0.487 0.554
## 5 0b848e743bee93a24ec2478f05351d293f5f3~ -1.42 0.0102 -0.145 0.708 0.500
## 6 0d9cce6d769144b8790139071d342aee4267f~ 6.73 2.03 -0.0470 0.795 -1.21
## 7 0ec2287262695dbaa3251e5df9c3aee0cee6c~ -2.01 0.185 0.120 0.589 -0.139
## 8 0f407cdc953730852f9938ce8d325e71b8d23~ -2.09 0.261 -0.0662 0.411 -0.0704
## 9 13d13d8ba02bea54041a02584536224960dce~ -1.94 0.108 0.306 0.767 -0.207
## 10 13d47c3b44a7b63db7b65d206fb1f0f0cd840~ -1.48 -0.134 1.14 -0.0753 -0.506
## # ... with 72 more rows
## # i Use `print(n = ...)` to see more rows
ggplot(juiced_pca, aes(PC1, PC2)) +
geom_point(alpha = 0.7, size = 2) +
labs(color = NULL)+
theme_minimal()
ggplot(juiced_pca, aes(PC1, PC3)) +
geom_point(alpha = 0.7, size = 2) +
labs(color = NULL)+
theme_minimal()
ggplot(juiced_pca, aes(PC1, PC4)) +
geom_point(alpha = 0.7, size = 2) +
labs(color = NULL)+
theme_minimal()
pca_rec2 <- # define recipe
recipe(~., data = data_wide) %>%
update_role(name_code, new_role = "id") %>%
step_normalize(all_predictors()) %>%
prep()
d4facto <- juice(pca_rec2)
d4facto
## # A tibble: 82 x 10
## name_code Proyek Praktek Porto~1 Haria~2 Haria~3 Haria~4 Haria~5 PAT
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 009c3fa52d00~ -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 0.190
## 2 03bfff0bb8d4~ -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 0.190
## 3 0546753a8f53~ 1.90 0.288 2.13 -0.639 1.76 1.92 2.16 0.766
## 4 060bd7b14727~ 0.0208 0.0271 -0.139 -0.639 -0.411 1.55 -0.304 1.05
## 5 0b848e743bee~ -0.748 0.0271 -0.684 -0.639 -0.411 -0.715 0.0317 -0.0983
## 6 0d9cce6d7691~ 3.35 1.20 3.40 1.55 3.57 1.55 2.16 1.92
## 7 0ec228726269~ -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 -0.0983
## 8 0f407cdc9537~ -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 -0.386
## 9 13d13d8ba02b~ -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 0.190
## 10 13d47c3b44a7~ -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 0.478
## # ... with 72 more rows, 1 more variable: PHBO <dbl>, and abbreviated variable
## # names 1: Portofolio, 2: `Harian 4`, 3: `Harian 2`, 4: `Harian 3`,
## # 5: `Harian 1`
## # i Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
# library(FactoMineR)
facto_pca <- PCA(
X = d4facto,
scale.unit = F,
quali.sup = c(1),
ncp = 6,
graph = F #disable visualization
)
head(facto_pca$ind$coord)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6
## 1 -1.5085935 0.07555772 1.03099615 -0.3669759 0.4632670 0.1705318
## 2 -1.9374308 -0.10820413 0.30631482 0.7674604 0.2073614 -0.1603130
## 3 4.0190829 -1.93467835 0.91855086 -0.1216915 -0.9618886 -0.2528085
## 4 0.7250594 0.75528856 1.75767341 -0.4873821 -0.5536845 -0.9457349
## 5 -1.4153958 -0.01020121 -0.14503239 0.7081886 -0.5000483 0.3525309
## 6 6.7304121 -2.03072138 -0.04698073 0.7946486 1.2067587 -0.1802793
plot.PCA(
x = facto_pca,
choix = "ind", # individual factor map# coloring
select = "contrib 1", # 5 most contributing factor
invisible = "quali"
)
d4facto[71,]
## # A tibble: 1 x 10
## name_code Proyek Praktek Porto~1 Haria~2 Haria~3 Haria~4 Haria~5 PAT PHBO
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 9734ce6692~ -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 -1.83 -1.01
## # ... with abbreviated variable names 1: Portofolio, 2: `Harian 4`,
## # 3: `Harian 2`, 4: `Harian 3`, 5: `Harian 1`
thename <- data_wide[71,] %>% pull(name_code)
data_wide %>%
filter(name_code == thename)
## # A tibble: 1 x 10
## name_code Proyek Praktek Porto~1 Haria~2 Haria~3 Haria~4 Haria~5 PAT PHBO
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 9734ce6692~ 26 26 26 19.5 19.5 19.5 19.5 78 78
## # ... with abbreviated variable names 1: Portofolio, 2: `Harian 4`,
## # 3: `Harian 2`, 4: `Harian 3`, 5: `Harian 1`
plot.PCA(
x = facto_pca,
choix = "var",
autoLab = "yes"
)
plot.PCA(
x = facto_pca,
choix = "var",
select = "contrib 1"
)
The “Ranah Keterampilan” that’s most correlated with the semester exam score (PAT) is “Praktek” assignment. But, the variable that has the most contribution the the two dimension the graph was built on, is “Proyek”.
Reuse d4facto, but without the first 3 columns.
d4kmeans <- d4facto %>%
select(-1)
head(d4kmeans)
## # A tibble: 6 x 9
## Proyek Praktek Portofolio `Harian 4` Harian ~1 Haria~2 Haria~3 PAT PHBO
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 0.190 0.477
## 2 -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 0.190 -1.01
## 3 1.90 0.288 2.13 -0.639 1.76 1.92 2.16 0.766 1.22
## 4 0.0208 0.0271 -0.139 -0.639 -0.411 1.55 -0.304 1.05 1.22
## 5 -0.748 0.0271 -0.684 -0.639 -0.411 -0.715 0.0317 -0.0983 -1.01
## 6 3.35 1.20 3.40 1.55 3.57 1.55 2.16 1.92 1.22
## # ... with abbreviated variable names 1: `Harian 2`, 2: `Harian 3`,
## # 3: `Harian 1`
We have to decide the numbers of K. Let’s simulate 9 Ks.
kmsim <-
tibble(k = 1:9)
kmsim
## # A tibble: 9 x 1
## k
## <int>
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
## 7 7
## 8 8
## 9 9
Create kmeans clustering for each number of K.
set.seed(1)
kmsim <- kmsim %>%
mutate(
# cluster the data 9 times
kclust = map(k, ~ kmeans(d4kmeans, .))
)
kmsim
## # A tibble: 9 x 2
## k kclust
## <int> <list>
## 1 1 <kmeans>
## 2 2 <kmeans>
## 3 3 <kmeans>
## 4 4 <kmeans>
## 5 5 <kmeans>
## 6 6 <kmeans>
## 7 7 <kmeans>
## 8 8 <kmeans>
## 9 9 <kmeans>
Apply tidy, glance, and augment to each kclust.
set.seed(1)
kmsim <- kmsim %>%
mutate(
glanced = map(kclust, glance)
)
kmsim
## # A tibble: 9 x 3
## k kclust glanced
## <int> <list> <list>
## 1 1 <kmeans> <tibble [1 x 4]>
## 2 2 <kmeans> <tibble [1 x 4]>
## 3 3 <kmeans> <tibble [1 x 4]>
## 4 4 <kmeans> <tibble [1 x 4]>
## 5 5 <kmeans> <tibble [1 x 4]>
## 6 6 <kmeans> <tibble [1 x 4]>
## 7 7 <kmeans> <tibble [1 x 4]>
## 8 8 <kmeans> <tibble [1 x 4]>
## 9 9 <kmeans> <tibble [1 x 4]>
Unnest glanced
clustering <-
kmsim %>%
unnest(cols = glanced)
clustering %>% head()
## # A tibble: 6 x 6
## k kclust totss tot.withinss betweenss iter
## <int> <list> <dbl> <dbl> <dbl> <int>
## 1 1 <kmeans> 729 729. 5.68e-13 1
## 2 2 <kmeans> 729 390. 3.39e+ 2 1
## 3 3 <kmeans> 729 288. 4.41e+ 2 2
## 4 4 <kmeans> 729 243. 4.86e+ 2 3
## 5 5 <kmeans> 729 204. 5.25e+ 2 2
## 6 6 <kmeans> 729 183. 5.46e+ 2 3
ggplot(clustering, aes(k, tot.withinss))+
geom_line()+
geom_point()
There are 3 centers before the tot.withinss decrease slowed down. That’s a good enough elbow. So our kmeans is:
thek <- kmsim %>%
filter(k == 3) %>%
pull(kclust)
Assign the cluster identification to a new column in d4kmeans
d4kmeans$cluster <- thek[[1]][[1]]
student_score_profile <-
d4kmeans %>%
group_by(cluster) %>%
summarise_all(mean)
student_score_profile
## # A tibble: 3 x 10
## cluster Proyek Praktek Portofo~1 Haria~2 Haria~3 Haria~4 Haria~5 PAT PHBO
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1.88 0.745 1.98 0.271 2.21 1.07 1.56 0.646 0.935
## 2 2 0.315 0.946 0.184 1.02 -0.324 0.681 0.144 0.512 0.607
## 3 3 -0.677 -0.724 -0.630 -0.639 -0.411 -0.665 -0.496 -0.457 -0.587
## # ... with abbreviated variable names 1: Portofolio, 2: `Harian 4`,
## # 3: `Harian 2`, 4: `Harian 3`, 5: `Harian 1`
library(ggiraphExtra)
ggRadar(
data=student_score_profile,
mapping = aes(colours = cluster),
ylim = 0.8
)+
facet_wrap(~cluster)
d4kmeans %>%
head()
## # A tibble: 6 x 10
## Proyek Praktek Porto~1 Haria~2 Haria~3 Haria~4 Haria~5 PAT PHBO cluster
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 0.190 0.477 3
## 2 -0.748 -0.887 -0.684 -0.639 -0.411 -0.715 -0.750 0.190 -1.01 3
## 3 1.90 0.288 2.13 -0.639 1.76 1.92 2.16 0.766 1.22 1
## 4 0.0208 0.0271 -0.139 -0.639 -0.411 1.55 -0.304 1.05 1.22 2
## 5 -0.748 0.0271 -0.684 -0.639 -0.411 -0.715 0.0317 -0.0983 -1.01 3
## 6 3.35 1.20 3.40 1.55 3.57 1.55 2.16 1.92 1.22 1
## # ... with abbreviated variable names 1: Portofolio, 2: `Harian 4`,
## # 3: `Harian 2`, 4: `Harian 3`, 5: `Harian 1`
#library(factoextra)
set.seed(1)
x <- kmeans(d4kmeans, 3)
fviz_cluster(object = x,
data = d4kmeans)
km_x_pca <- PCA(X = d4kmeans, # this table has already added with the cluster
quali.sup = 10,
graph=F)
fviz_pca_biplot(km_x_pca,
habillage = "cluster",
geom.ind = "point", # menampilkan titik observasi saja
addEllipses = T, # membuat elips disekitar cluster
col.var = "navy"
)
Summary: